In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(color_codes=True)
In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/Akhilvijaykumar/CARS.csv/main/CARS.csv')
     
In [5]:
sns.boxplot(x=df['MSRP'])
Out[5]:
<AxesSubplot:xlabel='MSRP'>
In [6]:
fig, ax = plt.subplots(figsize=(5,5))
ax.scatter(df['Horsepower'], df['MSRP'])
plt.title('Scatter plot between MSRP and Horsepower')
ax.set_xlabel('Horsepower')
ax.set_ylabel('MSRP')
plt.show()
In [7]:
def find_outliers_IQR(df):

   q1=df.quantile(0.25)

   q3=df.quantile(0.75)

   IQR=q3-q1

   outliers = df[((df<(q1-1.5*IQR)) | (df>(q3+1.5*IQR)))]

   return outliers
     
In [8]:
outliers = find_outliers_IQR(df['MSRP'])

print('number of outliers: '+ str(len(outliers)))

print('max outlier value: '+ str(outliers.max()))

print('min outlier value: '+ str(outliers.min()))
number of outliers: 996
max outlier value: 2065902
min outlier value: 74100
In [9]:
import plotly.express as px
fig = px.histogram(df, x='MSRP')

fig.show()
In [10]:
import pandas as pd
from matplotlib import pyplot as plt

# Read CSV into pandas
data = pd.read_csv(url)
data.head()
df = pd.DataFrame(data)

name = df['Model'].head(12)
price = df['MSRP'].head(12)

# Figure Size
fig = plt.figure(figsize =(10, 7))

# Horizontal Bar Plot
plt.bar(name[0:10], price[0:10])

# Show Plot
plt.show()
In [11]:
cars = ['AUDI', 'BMW', 'FORD',
        'TESLA', 'JAGUAR', 'MERCEDES']

data = [23, 17, 35, 29, 12, 41]

# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(data, labels = cars)

# show plot
plt.show()
     
In [ ]:
 
In [ ]: